LOB

  • Feedforward model with three hidden layers.

  • Trained just using LOB and not Liquidity measures: Input dimension is 60 x 21 = 1260

  • Mean squared error used as loss function.

  • Optimizer: Adam

  • Training set consists of 202 days (~80%)

  • Validation set consists of 51 days (~20%)

Shown below:

  • Network structure

  • Settings used for training

  • Plots of model performance

Network Structure

In [1]:
import os
import torch
from torchsummary import summary
from IPython.display import HTML
from configs.train_config import cfg
import importlib.util

file_name = [i for i in os.listdir('./') if i.count('evaluation')][0].split('.')[0]
trained_with = ''.join(file_name.split('evaluation_'))

dim_dict = {'LOB':21,'LIQ':24,'LOB+LIQ':44}

def hide_code(): return HTML('''<script>code_show=true; function code_toggle() {if (code_show){$('div.input').hide();} else {$('div.input').show();}code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

model_filename = [i for i in os.listdir('./') if i.count('.py')][0].split('.')[0]
spec = importlib.util.spec_from_file_location(f'{model_filename}', f"./{model_filename}.py")
model_file = importlib.util.module_from_spec(spec)
spec.loader.exec_module(model_file)
model=getattr(model_file,dir(model_file)[0])()

ckpt_name = os.listdir()[[*map(lambda x: x.count('ckpt'),os.listdir())].index(True)]
ckpt = torch.load('./'+ckpt_name,map_location=torch.device('cpu'))
print(f'Input size: {60*dim_dict[trained_with]}')
summary(model.float(), input_size=(60,dim_dict[trained_with]))

hide_code()
Input size: 1260
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1                   [-1, 64]          80,704
       BatchNorm1d-2                   [-1, 64]             128
         LeakyReLU-3                   [-1, 64]               0
           Dropout-4                   [-1, 64]               0
            Linear-5                   [-1, 64]           4,160
       BatchNorm1d-6                   [-1, 64]             128
         LeakyReLU-7                   [-1, 64]               0
            Linear-8                   [-1, 64]           4,160
       BatchNorm1d-9                   [-1, 64]             128
        LeakyReLU-10                   [-1, 64]               0
           Linear-11                    [-1, 5]             325
      BatchNorm1d-12                    [-1, 5]              10
        LeakyReLU-13                    [-1, 5]               0
================================================================
Total params: 89,743
Trainable params: 89,743
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.34
Estimated Total Size (MB): 0.35
----------------------------------------------------------------
Out[1]:

 Settings used in training

In [2]:
import pandas as pd
from configs.train_config import cfg
configuration = pd.concat([
                            pd.DataFrame.from_dict(cfg,'index').loc[['STOCK','LOB','LIQ_VARS']],\
                           pd.DataFrame.from_dict(cfg.TRAIN,'index').loc[['SHUFFLE','VAL_SHUFFLE','INTRADAY_SHUFFLE'\
                                                                    ,'SPLIT_RATIO','BATCH_SIZE','VAL_BATCH_SIZE','NGPUS']], \
                           pd.DataFrame.from_dict(cfg.MODEL,'index').loc[['BACKBONE','LOSS','DROPOUT_RATE','LEAKY_SLOPE']],\
                           pd.DataFrame.from_dict(cfg.OPTIMIZER,'index').loc[['LR','METHOD','LR_SCHEDULER','LAMBDA']]],\
              keys=['USED IN TRAINING','TRAIN','MODEL','OPTIMIZER']); configuration.columns=['CONFIGURATIONS']
print(f'Epoch loaded: {ckpt_name[5:-4]}')
configuration
Epoch loaded: 40000
Out[2]:
CONFIGURATIONS
USED IN TRAINING STOCK GARAN
LOB True
LIQ_VARS False
TRAIN SHUFFLE False
VAL_SHUFFLE False
INTRADAY_SHUFFLE False
SPLIT_RATIO 0.8
BATCH_SIZE 202
VAL_BATCH_SIZE 1
NGPUS 1
MODEL BACKBONE FFNN3
LOSS MSE
DROPOUT_RATE 0
LEAKY_SLOPE 0.5
OPTIMIZER LR 0.001
METHOD adam
LR_SCHEDULER ReduceLROnPlateau
LAMBDA 0
In [3]:
import sys
sys.path.append('./../')
import numpy as np
from sklearn.linear_model import Ridge,LinearRegression

filenames = os.listdir(cfg.DATA.DATA_PATH)
[filenames.pop(i) for i,k in enumerate(filenames) if k.split('.')[-1]!='npy'] #.DS_STORE problem
filenames.sort()
train_datanames = []
if cfg.DATA.PORTION is not None:
    filenames = filenames[:int(len(filenames)*cfg.DATA.PORTION)]
for filename in filenames:
    if len(train_datanames) < int(len(filenames)*cfg.TRAIN.SPLIT_RATIO):
        train_datanames.append(filename)
    else:
        break
    val_datanames = [i for i in filenames if not train_datanames.count(i)]
    
X_t = [] ; y_t = [] ; X_v = [] ; y_v = []
for i in train_datanames:
    item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
    X_t.append(torch.from_numpy(item['X'])); y_t.append(torch.from_numpy(item['y']))
for i in val_datanames:
    item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
    X_v.append(torch.from_numpy(item['X'])); y_v.append(torch.from_numpy(item['y']))


model.double();model.load_state_dict(ckpt['model_state'], strict=False);torch.set_grad_enabled(False);model.eval()

temp = []
for i in X_t:
    for k in i:
        temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []     
for i in X_v:
    for k in i:
        temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])

for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
    assert i.tolist() == k.tolist()

for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
    assert i.tolist() == k.tolist()

y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)
y_t = torch.cat(y_t) ; y_v = torch.cat(y_v)

se_train = (y_t_pred-y_t)**2 ; se_val = (y_v_pred-y_v)**2
ape_train = 100*(1-y_t_pred/y_t).abs() ; ape_val = 100*(1-y_v_pred/y_v).abs()

mse_train = se_train.mean(dim=0).numpy() ; mse_val = se_val.mean(dim=0).numpy()
mape_train = ape_train.mean(dim=0).numpy() ; mape_val = ape_val.mean(dim=0).numpy()


##### ##### ##### ##### ##### Linear Regression ##### ##### ##### ##### #####

X_reg = torch.cat(X_t).flatten(1,2)
X_reg = torch.cat([torch.tensor([[1]]*X_reg.shape[0],dtype=torch.float64),X_reg],1).tolist()
X_v_reg = torch.cat(X_v).flatten(1,2)
X_v_reg = torch.cat([torch.tensor([[1]]*X_v_reg.shape[0],dtype=torch.float64),X_v_reg],1)

regr = Ridge(0.0001)
regr.fit(X_reg,y_t.tolist())
y_t_regs = regr.predict(X_reg) ; y_v_regs = regr.predict(X_v_reg)

se_train_reg = (y_t-y_t_regs)**2 ; se_val_reg = (y_v-y_v_regs)**2
ape_train_reg = 100*(1-y_t_regs/y_t).abs() ; ape_val_reg = 100*(1-y_v_regs/y_v).abs()

reg_train_mses = se_train_reg.mean(dim=0).numpy() ; reg_mses = se_val_reg.mean(dim=0).numpy()
reg_train_mapes = ape_train_reg.mean(dim=0).numpy() ; reg_mapes = ape_val_reg.mean(dim=0).numpy()

##### ##### ##### ##### ##### Save Errors ##### ##### ##### ##### #####

np.save(f'../errors/{trained_with}.npy', {'se_train':se_train.numpy(),'se_val':se_val.numpy(),\
                              'ape_train':ape_train.numpy(),'ape_val':ape_val.numpy()})

np.save(f'../errors/LinReg_{trained_with}.npy', {'se_train':se_train_reg.numpy(),'se_val':se_val_reg.numpy(),
                                     'ape_train':ape_train_reg.numpy(),'ape_val':ape_val_reg.numpy()})

Evaluation plots

Below we have five plots for five different variables we wanted to predict. They are created by plotting the training and prediction performance of the model throughout the year for each day's trading window, which goes from the 61st trading minute until the last trading minute of the day.

These windows are ordered and stacked, starting from the first trading day's window until the last day's trading window.

On the x-axis we always have the minutes and on the y-axis the corresponding variable of the plot.

Lastly, we made use of a linear regression model with L2-regularization to compare the validation performance of our model with, which can also be seen in the plots below. The regularization parameter is set to 0.0001.

In [4]:
from CODES.utils.plotter import plotter

def get_axis_args(observable,**kwargs):
    
    obs_dict = {'Mid Price':0,'Bid Price Expectation':1,'Ask Price Expectation':2,'Bid Price Variance':3,'Ask Price Variance':4}
    ylim_dict = {'Mid Price':[6.7,11.3],'Bid Price Expectation':[6.7,11.3],'Ask Price Expectation':[6.7,11.3],'Bid Price Variance':[-0.0002,0.00045],'Ask Price Variance':[-0.0003,0.00055]}
    n = obs_dict[observable]
    ylabel = 'Price in TL' if n < 3 else 'Variance in TL$^{2}$'
    ylim = ylim_dict[observable]
    lw = 4 if n<3 else 2 ; lw2 = 0.5 if n<3 else 1 ; lw3 = 0.3 if n<3 else 1 ; lw4 = 0.3 if n<3 else 0.1 ; lw5 = 0.1
    alpha = 1 if n<3 else 0.5
    
    axis_args = [
                [ [[i[n].detach().numpy() for i in y_t],'-',dict(color='lightskyblue',linewidth=lw,alpha=alpha)],[[i[n].detach().numpy() for i in y_t_pred],dict(color='orangered',linewidth=lw2)],[range(len(y_t),len(y_t)+len(y_v)),[i[n].detach().numpy() for i in y_v],'-',dict(color='silver',linewidth=lw,alpha=alpha)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[n].detach().numpy() for i in y_v_pred],'-',dict(color='black',linewidth=lw3,alpha=1)],[range(len(y_t)),y_t_regs[:,n],'-',dict(color='forestgreen',linewidth=lw4,alpha=alpha)],[range(len(y_t),len(y_t)+len(y_v)),y_v_regs[:,n],'-',dict(color='gold',linewidth=lw5,alpha=alpha)]
                 ,[[],dict(color='lightskyblue',marker='o',alpha=alpha,ls='none',fillstyle='left')]             ,[[],dict(color='orangered',marker='o',ls='none',fillstyle='left')],               [[],dict(color='silver',marker='o',alpha=alpha,ls='none',fillstyle='right')],                                              [[],dict(color='black',marker='o',ls='none',fillstyle='right',alpha=1)],                                                         [[],dict(color='forestgreen',marker='o',alpha=alpha,ls='none',fillstyle='left')],       [[],dict(color='gold',marker='o',alpha=alpha,ls='none',fillstyle='right')]
                 ,[dict(cellText=[[mse_train[n],mape_train[n]],[mse_val[n],mape_val[n]],[reg_mses[n],reg_mapes[n]]],rowLabels=['Training Error','Validation Error','Linear Regression Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
                 ,[-2000,93000],ylim
                 ,['Minutes',dict(fontsize=15)], [ylabel,dict(fontsize=15)], [observable,dict(fontsize=20)]
                 ,[dict(line_order = [[0, 2], [1, 3],[4, 5]],labels=('Training/Validation data', 'Learned/Predicted by Model','Learned/Predicted by Linear Regression'),ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
                ]
                ]
    
    return axis_args


args = []

for obs in ['Mid Price','Bid Price Expectation','Ask Price Expectation','Bid Price Variance','Ask Price Variance']:#
    args += get_axis_args(obs)
        
attrs = [
          'plot','plot','plot','plot','plot','plot'
         ,'plot','plot','plot','plot','plot','plot' #legend icin
         ,'make_table'
         ,'set_xlim','set_ylim'
         ,'set_xlabel','set_ylabel','set_title'
         ,'legend','grid']

fig = plotter(args,attrs,fig_title=f'Converged Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=300, ncols=1,xpad=5)#,save_path = os.getcwd());

Convergence Curves

Below we have training and validation errors vs. number of iterations. The corresponding elapsed training time is also given on the top x-axis in minute scale. Validation errors are given every fifty epochs starting from the fiftieth epoch, whereas the training errors are given for every epoch.

The errors are given relative to the maximum of the corresponding error type and plotted in log scale as well.

The inset axis is showing the unscaled training and validation errors in logarithmic scale allowing a better comparison between training and validation errors, since the training and validation error maximums differ by a large margin.

The changes in the learning rate during training are also shown, placed at the corresponding iteration number of the transition.

In [5]:
from CODES.utils.DateHandler import *
from CODES.utils.plotter import plotter

def get_num(x):
    mask = []
    
    for i,char in enumerate(x):
        if char.isnumeric():
            mask.append(i)
            break
            
    for i,char in enumerate(x[::-1]):
        if char.isnumeric():
            mask.append(i)
            break
    assert len(mask) == 2
    
    result = x[mask[0]:-mask[1]]
    return result if result.count(':')==2 else float(result)

log = open("./log.txt", "r")
conv_dict = {'training loss':[],'validation loss':[],'mean absolute':[],'current time':[],'learning rate':[]}

for i,line in enumerate(log.readlines()):
    if len(conv_dict['training loss']) == int(get_num(ckpt_name))+1:
        conv_dict['training loss'].pop(-1)
        break
    else:
        for key in conv_dict:
            if line.strip().lower().count(key):
                if key == 'learning rate':
                    conv_dict[key].append([len(conv_dict['training loss']),get_num(line)])
                else:
                    conv_dict[key].append(get_num(line))

time_elapsed = [get_time_inbetween(make_clock(conv_dict['current time'][0]),i,'m') for i in map(make_clock,conv_dict['current time'])]
time_elapsed = [i if i>=0 else 24*60+i for i in time_elapsed]
maximums = {}
conv_dict_backup = conv_dict.copy()
for key in conv_dict:
    
    if ['training loss','validation loss','mean absolute'].count(key):
        maximums[key] = max(conv_dict[key])
        conv_dict[key] = np.array([i/max(conv_dict[key]) for i in conv_dict[key]])
        conv_dict_backup[key] = np.array(conv_dict_backup[key])
        
    if ['training loss','current time'].count(key):
        assert int(get_num(ckpt_name)) == len(conv_dict[key]),key
    elif key == 'learning rate':
        conv_dict[key] = np.array(conv_dict[key])
        conv_dict_backup[key] = np.array(conv_dict_backup[key])
    else:
        assert int(get_num(ckpt_name)) == len(conv_dict[key])*cfg.TRAIN.VAL_FREQUENCY,key
        
x_val = range(50,int(get_num(ckpt_name))+1,cfg.TRAIN.VAL_FREQUENCY)
x_full = [*range(int(get_num(ckpt_name))+1)][1:]
x_ticks = [i for i in x_full if not i%1000 or i ==1]
mult=7 #give large number
y_ticks = [-10,-5,0,0.5*mult,mult]
y_ticklabels = [i if i<0 else i/mult for i in y_ticks]
lw=0.8
ylim = [-mult,mult]
lr_texts =[[[round(i[0]/(x_full[-1]-x_full[0]),3), 0.85, f'LR set to {i[1]}'],dict(color='darkmagenta',fontsize=20,horizontalalignment='left',verticalalignment='top',rotation=-90)] for i in conv_dict['learning rate']]
for i,k in enumerate(lr_texts[:-1]):
    if abs(k[0][0] - lr_texts[i+1][0][0]) < 0.01:
        lr_texts[i+1][0][1] -= 0.4
        
################################################################################################################################################################
attrs2 = [
         'plot','plot'
        ,'set_xlim','set_ylim'
        ,'set_xlabel','set_ylabel','set_title'
        ,'vlines'
        ,'legend','grid'
        ]

args2 = [
        [ 
             [x_full,conv_dict_backup['training loss'],'-',dict(color='blue',linewidth=lw,label='Training MSE')],[x_val,conv_dict_backup["validation loss"],'-',dict(color='green',linewidth=lw,label='Validation MSE')]
            ,[1,int(get_num(ckpt_name))],[conv_dict_backup['training loss'].min(),conv_dict_backup['training loss'].max()]
            ,['Iteration No.',dict(fontsize=10)], ['Loss',dict(fontsize=10)]
            ,[conv_dict['learning rate'][:,:1],conv_dict_backup['training loss'].min(),conv_dict_backup['training loss'].max(),'magenta','dashed',dict(linewidth=1,label='Learning Rate Reductions',alpha=0.5)]
            ,[dict(ncol=1,shadow=1,labelspacing=0.2,fontsize=10,loc='upper right')],[dict(b=True,axis='both',alpha=0.2)]
        ]
        ]

################################################################################################################################################################
args3 = [
        [ 
             [x_full,np.log10(conv_dict_backup['training loss']),'-',dict(color='blue',linewidth=lw*0.7,label='Training MSE in Common Log Scale')], [x_val,np.log10(conv_dict_backup['validation loss']),'-',dict(color='green',linewidth=lw*0.7,label='Validation MSE in Common Log Scale')]
            ,[1,int(get_num(ckpt_name))],[np.log10(conv_dict_backup['training loss']).min(),np.log10(conv_dict_backup['training loss']).max()]
            ,['Iteration No.',dict(fontsize=10)], ['Loss',dict(fontsize=10)],['Unscaled Mean Squared Errors in Logarithmic Scale',dict(fontsize=15,pad=10)]
            ,[conv_dict['learning rate'][:,:1],np.log10(conv_dict_backup['training loss']).min(),np.log10(conv_dict_backup['training loss']).max(),'magenta','dashed',dict(linewidth=1,label='Learning Rate Reductions',alpha=0.5)]
            ,[dict(ncol=1,shadow=1,labelspacing=0.2,fontsize=10,loc='upper right')],[dict(b=True,axis='both',alpha=0.2)]
        ]
        ]


################################################################################################################################################################
args = [
        [ 
             [x_full,conv_dict['training loss']*mult,'-',dict(color='blue',linewidth=lw,label=r'Training MSE with Maximum $\approx$ '+f'{round(maximums["training loss"],2)}')],[x_val,conv_dict["validation loss"]*mult,'-',dict(color='green',linewidth=lw,label=r'Validation MSE with Maximum $\approx$ '+f'{round(maximums["validation loss"],2)}')],[x_val,conv_dict['mean absolute']*mult,'-',dict(color='red',linewidth=lw,label=r'Validation MAPE with Maximum $\approx$ '+f'{round(maximums["mean absolute"],2)}%')]
            ,[x_full,np.log10(conv_dict['training loss']),'-',dict(color='blue',linewidth=lw)],                                                                                   [x_val,np.log10(conv_dict['validation loss']),'-',dict(color='green',linewidth=lw)],                                                                                     [x_val,np.log10(conv_dict['mean absolute']),'-',dict(color='red',linewidth=lw)]
            ,[x_ticks],[y_ticks],[y_ticklabels],[x_ticks,dict(rotation=-45)]
            ,[1,int(get_num(ckpt_name))],ylim
            ,{'xlabel':dict(xlabel='Elapsed Time in Minutes',fontsize=20,labelpad=10),'tick_locations':x_ticks,'tick_function':lambda x: [int(time_elapsed[i-1]) for i in x]}
            ,['Iteration No.',dict(fontsize=20)], ['Loss in Units of Maximum Loss',dict(fontsize=20)], [f'Training Session with {trained_with}',dict(fontsize=25,pad=40)]
            ,[dict(color='black', lw=1)],[conv_dict['learning rate'][:,:1],ylim[0],ylim[1],'magenta','dashed',dict(linewidth=1,label='Learning Rate Reductions',alpha=0.5)]
            ,dict(bounds=[lr_texts[-2][0][0]+0.035,0.2,lr_texts[-1][0][0]-lr_texts[-2][0][0]-0.05,0.2],attributes=attrs2,args=args3,pipeline=0)
            ,[[0.98, 0.5, 'True Scale'],dict(color='black',fontsize=20,horizontalalignment='right',verticalalignment='bottom')],[[0.98, 0.49, 'Common Logarithm Scale'],dict(color='black',fontsize=20,horizontalalignment='right',verticalalignment='top')]
            ,lr_texts[0],lr_texts[1],lr_texts[2],lr_texts[3],lr_texts[4],lr_texts[5],lr_texts[6],lr_texts[7]
            ,[dict(ncol=1,shadow=1,labelspacing=0.2,fontsize=18,loc='upper right')],[dict(b=True,axis='both',alpha=0.5)]
        ]
        ]


attrs = [
         'plot','plot','plot'
        ,'plot','plot','plot'
        ,'set_xticks','set_yticks','set_yticklabels','set_xticklabels'
        ,'set_xlim','set_ylim'
        ,'make_twinx'
        ,'set_xlabel','set_ylabel','set_title'
        ,'axhline','vlines'
        ,'inset_axes'#,'inset_axes'
        ,'text','text'
        ,'text','text','text','text','text','text','text','text'
        ,'legend','grid'
        ]

fig = plotter(args,attrs,fig_title=f'Convergence Graph of Feedforward Model\n Stock: GARAN\n Year: 2017\n Total Epochs:{ckpt_name[5:-4]} ',dpi=300, ncols=1,ypad=10,suptitle_y=1.1,suptitle_x=0.51)#,save_path = os.getcwd());